{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-03-29 18:03:54.769532: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
      "2024-03-29 18:03:54.769695: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
      "Skipping registering GPU devices...\n"
     ]
    }
   ],
   "source": [
    "# libraries\n",
    "import tensorrt as trt\n",
    "import torch\n",
    "import numpy as np\n",
    "from tensorflow.keras import optimizers\n",
    "import matplotlib.pyplot as plt\n",
    "from tensorflow.keras import initializers\n",
    "from tensorflow.keras import regularizers\n",
    "from tensorflow.keras.models import Sequential, Model\n",
    "from tensorflow.keras.callbacks import ModelCheckpoint\n",
    "from tensorflow.keras.layers import concatenate\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, BatchNormalization, PReLU\n",
    "import tensorflow as tf\n",
    "\n",
    "print(tf.test.is_gpu_available())\n",
    "# for running on multiple GPU\n",
    "import os\n",
    "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\"\n",
    "import threading\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-03-29 18:00:13.902410: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
      "2024-03-29 18:00:13.902569: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
      "Skipping registering GPU devices...\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### text embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open('politifact/finalTrainEmbeddings.pkl', 'rb') as f:\n",
    "    trainEmbeddings = pickle.load(f)\n",
    "with open('politifact/finalTestEmbeddings.pkl', 'rb') as f:\n",
    "    testEmbeddings = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dictionary---> Article text: ( label, articleURL, ImageId )\n",
    "import json\n",
    "with open('imageDataset/politi/trainJson.json', 'r') as f:\n",
    "    trainData = json.load(f)\n",
    "with open('imageDataset/politi/testJson.json', 'r') as f:\n",
    "    testData = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in trainEmbeddings:\n",
    "    trainEmbeddings[i] = [torch.mean(j[0], axis=1) for j in trainEmbeddings[i]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in testEmbeddings:\n",
    "    testEmbeddings[i] = [torch.mean(j[0], axis=1) for j in testEmbeddings[i]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in testEmbeddings:\n",
    "    temp = testEmbeddings[i]\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# padding\n",
    "# if a paragraph has more than 50 sentences then crop, if less than 50 then pad.\n",
    "\n",
    "for i in trainEmbeddings:\n",
    "    if len(trainEmbeddings[i]) >=50:\n",
    "        trainEmbeddings[i] = trainEmbeddings[i][0:50]\n",
    "    else:\n",
    "        deficit = 50 - len(trainEmbeddings[i])\n",
    "        for j in range(deficit):\n",
    "            trainEmbeddings[i].append(torch.zeros((1,768), dtype=torch.float32, device='cuda:0'))\n",
    "    temp = torch.empty(50,768, dtype=torch.float32, device='cuda:0')\n",
    "    for j in range(len(trainEmbeddings[i])):\n",
    "        temp[j][:] = trainEmbeddings[i][j]\n",
    "    trainEmbeddings[i] = temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in testEmbeddings:\n",
    "    if len(testEmbeddings[i]) >=50:\n",
    "        testEmbeddings[i] = testEmbeddings[i][0:50]\n",
    "    else:\n",
    "        deficit = 50 - len(testEmbeddings[i])\n",
    "        for j in range(deficit):\n",
    "            testEmbeddings[i].append(torch.zeros((1,768), dtype=torch.float32, device='cuda:0'))\n",
    "    temp = torch.empty(50,768, dtype=torch.float32, device='cuda:0')\n",
    "    for j in range(len(testEmbeddings[i])):\n",
    "        temp[j][:] = testEmbeddings[i][j]\n",
    "    testEmbeddings[i] = temp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### image embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('politifact/train_vgg_poli.pickle', 'rb') as f:\n",
    "    train_vgg_poli = pickle.load(f)\n",
    "with open('politifact/test_vgg_poli.pickle', 'rb') as f:\n",
    "    test_vgg_poli = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_text = [] # text embeddings\n",
    "train_label = [] # labels\n",
    "test_text = [] # text embeddings\n",
    "test_label = []\n",
    "train_image = [] # image embeddings\n",
    "test_image = []\n",
    "\n",
    "# Train Image IDs\n",
    "# Test Image IDs\n",
    "trainImageNames = [] # names of the images i.e name.jpg\n",
    "trainTextNames = []  # train articles\n",
    "testTextNames = []   # test articles\n",
    "testImageNames = []  # names of the images in the test folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# match code\n",
    "# Check whether Image exists, then get corresponding Text Embeddings, and finally append to respective lists\n",
    "for i in train_vgg_poli:\n",
    "    for j in trainData:\n",
    "        if i.split('.jpg')[0] == trainData[j][-1]['Top_img']:\n",
    "            if j in trainEmbeddings:\n",
    "                trainImageNames.append(i)\n",
    "                trainTextNames.append(j)\n",
    "                train_text.append(trainEmbeddings[j])\n",
    "                train_image.append(train_vgg_poli[trainData[j][-1]['Top_img'] + '.jpg'])\n",
    "                train_label.append(trainData[j][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in test_vgg_poli:\n",
    "    for j in testData:\n",
    "        if i.split('.jpg')[0] == testData[j][-1]['Top_img']:\n",
    "            if j in testEmbeddings:\n",
    "                testImageNames.append(i)\n",
    "                testTextNames.append(j)\n",
    "                test_text.append(testEmbeddings[j])\n",
    "                test_image.append(test_vgg_poli[testData[j][-1]['Top_img'] + '.jpg'])\n",
    "                test_label.append(testData[j][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(381, 104)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(trainTextNames), len(testTextNames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(381, 104)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(trainImageNames), len(testImageNames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "321 164\n"
     ]
    }
   ],
   "source": [
    "realCount = 0\n",
    "fakeCount = 0\n",
    "\n",
    "for i in train_label:\n",
    "    if i == 1:\n",
    "        realCount += 1\n",
    "    elif i == 0:\n",
    "        fakeCount += 1\n",
    "\n",
    "for i in test_label:\n",
    "    if i == 1:\n",
    "        realCount += 1\n",
    "    elif i == 0:\n",
    "        fakeCount += 1\n",
    "\n",
    "print(realCount, fakeCount)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df=pd.DataFrame()\n",
    "df['article']=testTextNames\n",
    "df['image']=testImageNames\n",
    "df['label']=test_label\n",
    "\n",
    "\n",
    "# df['article']=trainTextNames\n",
    "# df['image']=trainImageNames\n",
    "# df['label']=train_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('/media/data_dump_2/Shivangi/baseline_models/politifact_test.csv', sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# saving updated final dataset in the csv files for future reference\n",
    "import pandas as pd\n",
    "df=pd.DataFrame()\n",
    "# df['train_articles']=trainTextNames\n",
    "# df.to_csv('politifact_train_articles.csv', index=False)\n",
    "\n",
    "# df['test_articles']=testTextNames\n",
    "# df.to_csv('politifact_test_articles.csv', index=False)\n",
    "\n",
    "df['train_label']=train_label\n",
    "df.to_csv('politifact_train_label.csv', index=False)\n",
    "\n",
    "# df['test_label']=test_label\n",
    "# df.to_csv('politifact_test_label.csv', index=False)\n",
    "\n",
    "# df['train_image']=trainImageNames\n",
    "# df.to_csv('politifact_train_image.csv', index=False)\n",
    "\n",
    "# df['test_image']=testImageNames\n",
    "# df.to_csv('politifact_test_image.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_label = to_categorical(train_label)\n",
    "test_label = to_categorical(test_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_text=[torch.Tensor.numpy(i.cpu()) for i in train_text]\n",
    "test_text=[torch.Tensor.numpy(i.cpu()) for i in test_text]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_text_matrix = np.ndarray(shape=(len(train_text), 50,768))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(381, 50, 768)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_text_matrix.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(104, 50, 768)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_text_matrix.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 4096)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_image[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(381, 4096, 1)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_image_matrix .shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(104, 4096, 1)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_image_matrix.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### multimodal: XLNET + dense layer + VGG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-03-29 18:11:13.814952: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
      "2024-03-29 18:11:13.815179: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
      "Skipping registering GPU devices...\n",
      "2024-03-29 18:11:13.841915: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 153600000 exceeds 10% of free system memory.\n",
      "2024-03-29 18:11:14.059118: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 153600000 exceeds 10% of free system memory.\n",
      "2024-03-29 18:11:14.093303: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 153600000 exceeds 10% of free system memory.\n",
      "2024-03-29 18:11:14.209096: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 32768000 exceeds 10% of free system memory.\n",
      "2024-03-29 18:11:14.256548: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 32768000 exceeds 10% of free system memory.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                Output Shape                 Param #   Connected to                  \n",
      "==================================================================================================\n",
      " input_1 (InputLayer)        [(None, 50, 768)]            0         []                            \n",
      "                                                                                                  \n",
      " flatten (Flatten)           (None, 38400)                0         ['input_1[0][0]']             \n",
      "                                                                                                  \n",
      " input_2 (InputLayer)        [(None, 4096)]               0         []                            \n",
      "                                                                                                  \n",
      " dense (Dense)               (None, 1000)                 3840100   ['flatten[0][0]']             \n",
      "                                                          0                                       \n",
      "                                                                                                  \n",
      " dense_3 (Dense)             (None, 2000)                 8194000   ['input_2[0][0]']             \n",
      "                                                                                                  \n",
      " dense_1 (Dense)             (None, 500)                  500500    ['dense[0][0]']               \n",
      "                                                                                                  \n",
      " dense_4 (Dense)             (None, 1000)                 2001000   ['dense_3[0][0]']             \n",
      "                                                                                                  \n",
      " dense_2 (Dense)             (None, 100)                  50100     ['dense_1[0][0]']             \n",
      "                                                                                                  \n",
      " dense_5 (Dense)             (None, 100)                  100100    ['dense_4[0][0]']             \n",
      "                                                                                                  \n",
      " batch_normalization (Batch  (None, 100)                  400       ['dense_2[0][0]']             \n",
      " Normalization)                                                                                   \n",
      "                                                                                                  \n",
      " batch_normalization_1 (Bat  (None, 100)                  400       ['dense_5[0][0]']             \n",
      " chNormalization)                                                                                 \n",
      "                                                                                                  \n",
      " dropout (Dropout)           (None, 100)                  0         ['batch_normalization[0][0]'] \n",
      "                                                                                                  \n",
      " dropout_1 (Dropout)         (None, 100)                  0         ['batch_normalization_1[0][0]'\n",
      "                                                                    ]                             \n",
      "                                                                                                  \n",
      " concatenate (Concatenate)   (None, 200)                  0         ['dropout[0][0]',             \n",
      "                                                                     'dropout_1[0][0]']           \n",
      "                                                                                                  \n",
      " dense_6 (Dense)             (None, 200)                  40200     ['concatenate[0][0]']         \n",
      "                                                                                                  \n",
      " dense_7 (Dense)             (None, 100)                  20100     ['dense_6[0][0]']             \n",
      "                                                                                                  \n",
      " dense_8 (Dense)             (None, 50)                   5050      ['dense_7[0][0]']             \n",
      "                                                                                                  \n",
      " dropout_2 (Dropout)         (None, 50)                   0         ['dense_8[0][0]']             \n",
      "                                                                                                  \n",
      " dense_9 (Dense)             (None, 2)                    102       ['dropout_2[0][0]']           \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 49312952 (188.11 MB)\n",
      "Trainable params: 49312552 (188.11 MB)\n",
      "Non-trainable params: 400 (1.56 KB)\n",
      "__________________________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "input_text = Input(shape=(50,768))\n",
    "text_flat = Flatten()(input_text)\n",
    "dense_text = Dense(1000,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(text_flat)\n",
    "#dense_text = Dropout(0.4)(dense_text)\n",
    "dense_text = Dense(500,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(dense_text)\n",
    "#dense_text = Dropout(0.4)(dense_text)\n",
    "dense_text = Dense(100,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(dense_text)\n",
    "dense_text = BatchNormalization()(dense_text)\n",
    "dense_text_drop = Dropout(0.4)(dense_text)\n",
    "\n",
    "input_image = Input(shape=(4096,))\n",
    "dense_image = Dense(2000,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(input_image)\n",
    "#dense_image = Dropout(0.4)(dense_image)\n",
    "dense_image = Dense(1000, activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(dense_image)\n",
    "#dense_image = Dropout(0.4)(dense_image)\n",
    "dense_image = Dense(100,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(dense_image)\n",
    "dense_image = BatchNormalization()(dense_image)\n",
    "dense_image_drop = Dropout(0.4)(dense_image)\n",
    "\n",
    "concat = concatenate([dense_text_drop,dense_image_drop])\n",
    "\n",
    "inter1_dense = Dense(200,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(concat)\n",
    "inter1_dense = Dense(100,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(inter1_dense)\n",
    "final_dense = Dense(50,activation='relu',kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.he_normal(seed=0))(inter1_dense)\n",
    "final_dropout = Dropout(0.4)(final_dense)\n",
    "output = Dense(2, activation='softmax')(final_dropout)\n",
    "\n",
    "model = Model(inputs=[input_text,input_image], outputs=output)\n",
    "adam = optimizers.Adam(learning_rate=1e-4)\n",
    "#adagrad = optimizers.Adagrad(lr=1e-4)\n",
    "#adamax = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999)\n",
    "checkpoint = ModelCheckpoint(filepath='../checkpoints_polity/dense_MM_model.keras', monitor='val_acc', verbose=1, save_best_only=True, mode='max')\n",
    "callbacks_list = [checkpoint]\n",
    "\n",
    "#sgd = optimizers.SGD(lr=1e-4, clipnorm=1.)\n",
    "model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/zhengpeng/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "/home/zhengpeng/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
      "  warnings.warn(\n",
      "/home/zhengpeng/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG19_Weights.IMAGENET1K_V1`. You can also use `weights=VGG19_Weights.DEFAULT` to get the most up-to-date weights.\n",
      "  warnings.warn(msg)\n",
      "/home/zhengpeng/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:2619: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "from data import DataProcess\n",
    "\n",
    "dp_train = DataProcess(\"/root/autodl-tmp/fakeddit/train.tsv\",\"/root/autodl-tmp/fakeddit\",100)\n",
    "train_text_matrix, train_image_matrix, train_label = dp_train.getdata()\n",
    "dp_test = DataProcess(\"/root/autodl-tmp/fakeddit/train.tsv\",\"/root/autodl-tmp/fakeddit\",100)\n",
    "test_text_matrix, test_image_matrix, test_label = dp_test.getdata()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['AlexNet', 'AlexNet_Weights', 'ConvNeXt', 'ConvNeXt_Base_Weights', 'ConvNeXt_Large_Weights', 'ConvNeXt_Small_Weights', 'ConvNeXt_Tiny_Weights', 'DenseNet', 'DenseNet121_Weights', 'DenseNet161_Weights', 'DenseNet169_Weights', 'DenseNet201_Weights', 'EfficientNet', 'EfficientNet_B0_Weights', 'EfficientNet_B1_Weights', 'EfficientNet_B2_Weights', 'EfficientNet_B3_Weights', 'EfficientNet_B4_Weights', 'EfficientNet_B5_Weights', 'EfficientNet_B6_Weights', 'EfficientNet_B7_Weights', 'EfficientNet_V2_L_Weights', 'EfficientNet_V2_M_Weights', 'EfficientNet_V2_S_Weights', 'GoogLeNet', 'GoogLeNetOutputs', 'GoogLeNet_Weights', 'Inception3', 'InceptionOutputs', 'Inception_V3_Weights', 'MNASNet', 'MNASNet0_5_Weights', 'MNASNet0_75_Weights', 'MNASNet1_0_Weights', 'MNASNet1_3_Weights', 'MaxVit', 'MaxVit_T_Weights', 'MobileNetV2', 'MobileNetV3', 'MobileNet_V2_Weights', 'MobileNet_V3_Large_Weights', 'MobileNet_V3_Small_Weights', 'RegNet', 'RegNet_X_16GF_Weights', 'RegNet_X_1_6GF_Weights', 'RegNet_X_32GF_Weights', 'RegNet_X_3_2GF_Weights', 'RegNet_X_400MF_Weights', 'RegNet_X_800MF_Weights', 'RegNet_X_8GF_Weights', 'RegNet_Y_128GF_Weights', 'RegNet_Y_16GF_Weights', 'RegNet_Y_1_6GF_Weights', 'RegNet_Y_32GF_Weights', 'RegNet_Y_3_2GF_Weights', 'RegNet_Y_400MF_Weights', 'RegNet_Y_800MF_Weights', 'RegNet_Y_8GF_Weights', 'ResNeXt101_32X8D_Weights', 'ResNeXt101_64X4D_Weights', 'ResNeXt50_32X4D_Weights', 'ResNet', 'ResNet101_Weights', 'ResNet152_Weights', 'ResNet18_Weights', 'ResNet34_Weights', 'ResNet50_Weights', 'ShuffleNetV2', 'ShuffleNet_V2_X0_5_Weights', 'ShuffleNet_V2_X1_0_Weights', 'ShuffleNet_V2_X1_5_Weights', 'ShuffleNet_V2_X2_0_Weights', 'SqueezeNet', 'SqueezeNet1_0_Weights', 'SqueezeNet1_1_Weights', 'SwinTransformer', 'Swin_B_Weights', 'Swin_S_Weights', 'Swin_T_Weights', 'Swin_V2_B_Weights', 'Swin_V2_S_Weights', 'Swin_V2_T_Weights', 'VGG', 'VGG11_BN_Weights', 'VGG11_Weights', 'VGG13_BN_Weights', 'VGG13_Weights', 'VGG16_BN_Weights', 'VGG16_Weights', 'VGG19_BN_Weights', 'VGG19_Weights', 'ViT_B_16_Weights', 'ViT_B_32_Weights', 'ViT_H_14_Weights', 'ViT_L_16_Weights', 'ViT_L_32_Weights', 'VisionTransformer', 'Weights', 'WeightsEnum', 'Wide_ResNet101_2_Weights', 'Wide_ResNet50_2_Weights', '_GoogLeNetOutputs', '_InceptionOutputs', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_api', '_meta', '_utils', 'alexnet', 'convnext', 'convnext_base', 'convnext_large', 'convnext_small', 'convnext_tiny', 'densenet', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'detection', 'efficientnet', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_v2_l', 'efficientnet_v2_m', 'efficientnet_v2_s', 'get_model', 'get_model_builder', 'get_model_weights', 'get_weight', 'googlenet', 'inception', 'inception_v3', 'list_models', 'maxvit', 'maxvit_t', 'mnasnet', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet', 'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small', 'mobilenetv2', 'mobilenetv3', 'optical_flow', 'quantization', 'regnet', 'regnet_x_16gf', 'regnet_x_1_6gf', 'regnet_x_32gf', 'regnet_x_3_2gf', 'regnet_x_400mf', 'regnet_x_800mf', 'regnet_x_8gf', 'regnet_y_128gf', 'regnet_y_16gf', 'regnet_y_1_6gf', 'regnet_y_32gf', 'regnet_y_3_2gf', 'regnet_y_400mf', 'regnet_y_800mf', 'regnet_y_8gf', 'resnet', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'resnext101_32x8d', 'resnext101_64x4d', 'resnext50_32x4d', 'segmentation', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'shufflenetv2', 'squeezenet', 'squeezenet1_0', 'squeezenet1_1', 'swin_b', 'swin_s', 'swin_t', 'swin_transformer', 'swin_v2_b', 'swin_v2_s', 'swin_v2_t', 'vgg', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'video', 'vision_transformer', 'vit_b_16', 'vit_b_32', 'vit_h_14', 'vit_l_16', 'vit_l_32', 'wide_resnet101_2', 'wide_resnet50_2']\n"
     ]
    }
   ],
   "source": [
    "from torchvision import models\n",
    "\n",
    "print(dir(models))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 101.7084 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 4s 4s/step - loss: 101.7084 - accuracy: 1.0000 - val_loss: 103.8905 - val_accuracy: 0.0000e+00\n",
      "Epoch 2/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 100.8104 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 1s 693ms/step - loss: 100.8104 - accuracy: 1.0000 - val_loss: 103.0616 - val_accuracy: 0.0000e+00\n",
      "Epoch 3/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 99.9176 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 1s 636ms/step - loss: 99.9176 - accuracy: 1.0000 - val_loss: 102.2528 - val_accuracy: 0.0000e+00\n",
      "Epoch 4/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 99.0328 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 1s 656ms/step - loss: 99.0328 - accuracy: 1.0000 - val_loss: 101.4499 - val_accuracy: 0.0000e+00\n",
      "Epoch 5/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 98.1566 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 1s 645ms/step - loss: 98.1566 - accuracy: 1.0000 - val_loss: 100.6449 - val_accuracy: 0.0000e+00\n",
      "Epoch 6/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 97.2897 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 1s 629ms/step - loss: 97.2897 - accuracy: 1.0000 - val_loss: 99.8352 - val_accuracy: 0.0000e+00\n",
      "Epoch 7/100\n",
      "1/1 [==============================] - ETA: 0s - loss: 96.4322 - accuracy: 1.0000WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n",
      "1/1 [==============================] - 1s 603ms/step - loss: 96.4322 - accuracy: 1.0000 - val_loss: 99.0340 - val_accuracy: 0.0000e+00\n",
      "Epoch 8/100\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m/media/zhengpeng/epan/program/python/SpotFakePlusAAAI/XLNET_politifact.ipynb Cell 34\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell:/media/zhengpeng/epan/program/python/SpotFakePlusAAAI/XLNET_politifact.ipynb#X43sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m history \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mfit([train_text_matrix, train_image_matrix],train_label,validation_data\u001b[39m=\u001b[39;49m([test_text_matrix,test_image_matrix],test_label),batch_size \u001b[39m=\u001b[39;49m\u001b[39m32\u001b[39;49m,epochs \u001b[39m=\u001b[39;49m\u001b[39m100\u001b[39;49m,callbacks\u001b[39m=\u001b[39;49mcallbacks_list)\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py:65\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     63\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m     64\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 65\u001b[0m     \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m     66\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m     67\u001b[0m     filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/keras/src/engine/training.py:1783\u001b[0m, in \u001b[0;36mModel.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m   1775\u001b[0m \u001b[39mwith\u001b[39;00m tf\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mexperimental\u001b[39m.\u001b[39mTrace(\n\u001b[1;32m   1776\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mtrain\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m   1777\u001b[0m     epoch_num\u001b[39m=\u001b[39mepoch,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1780\u001b[0m     _r\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m   1781\u001b[0m ):\n\u001b[1;32m   1782\u001b[0m     callbacks\u001b[39m.\u001b[39mon_train_batch_begin(step)\n\u001b[0;32m-> 1783\u001b[0m     tmp_logs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_function(iterator)\n\u001b[1;32m   1784\u001b[0m     \u001b[39mif\u001b[39;00m data_handler\u001b[39m.\u001b[39mshould_sync:\n\u001b[1;32m   1785\u001b[0m         context\u001b[39m.\u001b[39masync_wait()\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    148\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    149\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m   \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m    151\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m    152\u001b[0m   filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:831\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    828\u001b[0m compiler \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mxla\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile \u001b[39melse\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mnonXla\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    830\u001b[0m \u001b[39mwith\u001b[39;00m OptionalXlaContext(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile):\n\u001b[0;32m--> 831\u001b[0m   result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m    833\u001b[0m new_tracing_count \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexperimental_get_tracing_count()\n\u001b[1;32m    834\u001b[0m without_tracing \u001b[39m=\u001b[39m (tracing_count \u001b[39m==\u001b[39m new_tracing_count)\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:867\u001b[0m, in \u001b[0;36mFunction._call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    864\u001b[0m   \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n\u001b[1;32m    865\u001b[0m   \u001b[39m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[1;32m    866\u001b[0m   \u001b[39m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[0;32m--> 867\u001b[0m   \u001b[39mreturn\u001b[39;00m tracing_compilation\u001b[39m.\u001b[39;49mcall_function(\n\u001b[1;32m    868\u001b[0m       args, kwds, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_no_variable_creation_config\n\u001b[1;32m    869\u001b[0m   )\n\u001b[1;32m    870\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_variable_creation_config \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    871\u001b[0m   \u001b[39m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[1;32m    872\u001b[0m   \u001b[39m# in parallel.\u001b[39;00m\n\u001b[1;32m    873\u001b[0m   \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py:139\u001b[0m, in \u001b[0;36mcall_function\u001b[0;34m(args, kwargs, tracing_options)\u001b[0m\n\u001b[1;32m    137\u001b[0m bound_args \u001b[39m=\u001b[39m function\u001b[39m.\u001b[39mfunction_type\u001b[39m.\u001b[39mbind(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m    138\u001b[0m flat_inputs \u001b[39m=\u001b[39m function\u001b[39m.\u001b[39mfunction_type\u001b[39m.\u001b[39munpack_inputs(bound_args)\n\u001b[0;32m--> 139\u001b[0m \u001b[39mreturn\u001b[39;00m function\u001b[39m.\u001b[39;49m_call_flat(  \u001b[39m# pylint: disable=protected-access\u001b[39;49;00m\n\u001b[1;32m    140\u001b[0m     flat_inputs, captured_inputs\u001b[39m=\u001b[39;49mfunction\u001b[39m.\u001b[39;49mcaptured_inputs\n\u001b[1;32m    141\u001b[0m )\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/concrete_function.py:1264\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[0;34m(self, tensor_inputs, captured_inputs)\u001b[0m\n\u001b[1;32m   1260\u001b[0m possible_gradient_type \u001b[39m=\u001b[39m gradients_util\u001b[39m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[1;32m   1261\u001b[0m \u001b[39mif\u001b[39;00m (possible_gradient_type \u001b[39m==\u001b[39m gradients_util\u001b[39m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[1;32m   1262\u001b[0m     \u001b[39mand\u001b[39;00m executing_eagerly):\n\u001b[1;32m   1263\u001b[0m   \u001b[39m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[0;32m-> 1264\u001b[0m   \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_inference_function\u001b[39m.\u001b[39;49mflat_call(args)\n\u001b[1;32m   1265\u001b[0m forward_backward \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[1;32m   1266\u001b[0m     args,\n\u001b[1;32m   1267\u001b[0m     possible_gradient_type,\n\u001b[1;32m   1268\u001b[0m     executing_eagerly)\n\u001b[1;32m   1269\u001b[0m forward_function, args_with_tangents \u001b[39m=\u001b[39m forward_backward\u001b[39m.\u001b[39mforward()\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:217\u001b[0m, in \u001b[0;36mAtomicFunction.flat_call\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m    215\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mflat_call\u001b[39m(\u001b[39mself\u001b[39m, args: Sequence[core\u001b[39m.\u001b[39mTensor]) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[1;32m    216\u001b[0m \u001b[39m  \u001b[39m\u001b[39m\"\"\"Calls with tensor inputs and returns the structured output.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 217\u001b[0m   flat_outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m(\u001b[39m*\u001b[39;49margs)\n\u001b[1;32m    218\u001b[0m   \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfunction_type\u001b[39m.\u001b[39mpack_output(flat_outputs)\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:252\u001b[0m, in \u001b[0;36mAtomicFunction.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    250\u001b[0m \u001b[39mwith\u001b[39;00m record\u001b[39m.\u001b[39mstop_recording():\n\u001b[1;32m    251\u001b[0m   \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_bound_context\u001b[39m.\u001b[39mexecuting_eagerly():\n\u001b[0;32m--> 252\u001b[0m     outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_bound_context\u001b[39m.\u001b[39;49mcall_function(\n\u001b[1;32m    253\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname,\n\u001b[1;32m    254\u001b[0m         \u001b[39mlist\u001b[39;49m(args),\n\u001b[1;32m    255\u001b[0m         \u001b[39mlen\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfunction_type\u001b[39m.\u001b[39;49mflat_outputs),\n\u001b[1;32m    256\u001b[0m     )\n\u001b[1;32m    257\u001b[0m   \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    258\u001b[0m     outputs \u001b[39m=\u001b[39m make_call_op_in_graph(\n\u001b[1;32m    259\u001b[0m         \u001b[39mself\u001b[39m,\n\u001b[1;32m    260\u001b[0m         \u001b[39mlist\u001b[39m(args),\n\u001b[1;32m    261\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_bound_context\u001b[39m.\u001b[39mfunction_call_options\u001b[39m.\u001b[39mas_attrs(),\n\u001b[1;32m    262\u001b[0m     )\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/context.py:1479\u001b[0m, in \u001b[0;36mContext.call_function\u001b[0;34m(self, name, tensor_inputs, num_outputs)\u001b[0m\n\u001b[1;32m   1477\u001b[0m cancellation_context \u001b[39m=\u001b[39m cancellation\u001b[39m.\u001b[39mcontext()\n\u001b[1;32m   1478\u001b[0m \u001b[39mif\u001b[39;00m cancellation_context \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1479\u001b[0m   outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39;49mexecute(\n\u001b[1;32m   1480\u001b[0m       name\u001b[39m.\u001b[39;49mdecode(\u001b[39m\"\u001b[39;49m\u001b[39mutf-8\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[1;32m   1481\u001b[0m       num_outputs\u001b[39m=\u001b[39;49mnum_outputs,\n\u001b[1;32m   1482\u001b[0m       inputs\u001b[39m=\u001b[39;49mtensor_inputs,\n\u001b[1;32m   1483\u001b[0m       attrs\u001b[39m=\u001b[39;49mattrs,\n\u001b[1;32m   1484\u001b[0m       ctx\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[1;32m   1485\u001b[0m   )\n\u001b[1;32m   1486\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m   1487\u001b[0m   outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39mexecute_with_cancellation(\n\u001b[1;32m   1488\u001b[0m       name\u001b[39m.\u001b[39mdecode(\u001b[39m\"\u001b[39m\u001b[39mutf-8\u001b[39m\u001b[39m\"\u001b[39m),\n\u001b[1;32m   1489\u001b[0m       num_outputs\u001b[39m=\u001b[39mnum_outputs,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1493\u001b[0m       cancellation_manager\u001b[39m=\u001b[39mcancellation_context,\n\u001b[1;32m   1494\u001b[0m   )\n",
      "File \u001b[0;32m~/home/zhengpeng/miniconda3/envs/tensorflow/lib/python3.9/site-packages/tensorflow/python/eager/execute.py:60\u001b[0m, in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m     53\u001b[0m   \u001b[39m# Convert any objects of type core_types.Tensor to Tensor.\u001b[39;00m\n\u001b[1;32m     54\u001b[0m   inputs \u001b[39m=\u001b[39m [\n\u001b[1;32m     55\u001b[0m       tensor_conversion_registry\u001b[39m.\u001b[39mconvert(t)\n\u001b[1;32m     56\u001b[0m       \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(t, core_types\u001b[39m.\u001b[39mTensor)\n\u001b[1;32m     57\u001b[0m       \u001b[39melse\u001b[39;00m t\n\u001b[1;32m     58\u001b[0m       \u001b[39mfor\u001b[39;00m t \u001b[39min\u001b[39;00m inputs\n\u001b[1;32m     59\u001b[0m   ]\n\u001b[0;32m---> 60\u001b[0m   tensors \u001b[39m=\u001b[39m pywrap_tfe\u001b[39m.\u001b[39;49mTFE_Py_Execute(ctx\u001b[39m.\u001b[39;49m_handle, device_name, op_name,\n\u001b[1;32m     61\u001b[0m                                       inputs, attrs, num_outputs)\n\u001b[1;32m     62\u001b[0m \u001b[39mexcept\u001b[39;00m core\u001b[39m.\u001b[39m_NotOkStatusException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m     63\u001b[0m   \u001b[39mif\u001b[39;00m name \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "history = model.fit([train_text_matrix, train_image_matrix],train_label,validation_data=([test_text_matrix,test_image_matrix],test_label),batch_size =32,epochs =100,callbacks=callbacks_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('XL_poli_history.json', 'w') as f:\n",
    "    json.dump(str(history.history), f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(history.history['acc'])\n",
    "plt.plot(history.history['val_acc'])\n",
    "plt.title('Model accuracy')\n",
    "plt.ylabel('Accuracy')\n",
    "plt.xlabel('Epoch')\n",
    "plt.legend(['Train', 'Test'], loc='upper left')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(history.history['loss'])\n",
    "plt.plot(history.history['val_loss'])\n",
    "plt.title('')\n",
    "plt.ylabel('Loss')\n",
    "plt.xlabel('Epoch')\n",
    "plt.legend(['Train', 'Test'], loc='upper left')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### XLNET+ dense layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def baseline_model():\n",
    "    model = Sequential()\n",
    "    model.add(Flatten(input_shape=(50,768)))\n",
    "    model.add(Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01)))\n",
    "    #model.add(Dropout(0.4))\n",
    "    model.add(Dense(500, activation='relu', kernel_regularizer=regularizers.l2(0.01)))\n",
    "    #model.add(Dropout(0.4))\n",
    "    model.add(Dense(100, activation='relu', kernel_regularizer=regularizers.l2(0.01)))\n",
    "    #model.add(BatchNormalization())\n",
    "    model.add(Dropout(0.4))\n",
    "    model.add(Dense(2, activation='softmax'))\n",
    "    #adam = optimizers.Adam(lr=1e-4)\n",
    "    #adamax = optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)\n",
    "    sgd = optimizers.SGD(lr=0.001, clipnorm=1.)\n",
    "    #adadelta = optimizers.Adadelta(lr=1.0, rho=0.95)\n",
    "    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "checkpoint = ModelCheckpoint(filepath='../checkpoints_polity/dense_Text_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')\n",
    "callbacks_list = [checkpoint]\n",
    "model = baseline_model()\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tempHist = model.fit(train_text_matrix,train_label,validation_data=(test_text_matrix,test_label),batch_size =32,epochs =100,callbacks=callbacks_list)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### XLNET + LSTM "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout, Conv1D,MaxPooling1D,Flatten\n",
    "from keras.layers import LSTM, Bidirectional\n",
    "from keras.optimizers import RMSprop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lstm():\n",
    "    model = Sequential()\n",
    "    model.add(LSTM(128,input_shape=(50,768)))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Dense(595, activation='relu'))\n",
    "    model.add(Dense(2, activation='softmax'))\n",
    "    model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "checkpoint = ModelCheckpoint(filepath='../checkpoints_polity/lstm_text_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')\n",
    "callbacks_list = [checkpoint]\n",
    "model = lstm()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(train_text_matrix,train_label,validation_data=(test_text_matrix,test_label),batch_size =32,epochs =100, callbacks=callbacks_list)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### XLNET + CNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cnn_model():\n",
    "    model = Sequential()\n",
    "    model.add(Conv1D(filters=3, kernel_size=5, activation='relu',data_format='channels_first' , input_shape=(50,768)))\n",
    "    model.add(MaxPooling1D(pool_size=2))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Conv1D(filters=3, kernel_size=5, activation='relu',data_format='channels_first' ))\n",
    "    model.add(MaxPooling1D(pool_size=2))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Flatten())\n",
    "    model.add(Dense(500, activation='relu'))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Dense(100, activation='relu'))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Dense(2, activation='softmax'))\n",
    "    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model=cnn_model()\n",
    "checkpoint = ModelCheckpoint(filepath='../checkpoints_polity/cnn_text_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')\n",
    "callbacks_list = [checkpoint]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(train_text_matrix,train_label,validation_data=(test_text_matrix,test_label),batch_size =32,epochs =100, callbacks=callbacks_list)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
